library(rlang)
library(readr)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:rlang':
## 
##     set_names

#Preparation des données

library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr     1.1.2     v purrr     1.0.1
## v forcats   1.0.0     v stringr   1.5.0
## v ggplot2   3.4.2     v tibble    3.2.1
## v lubridate 1.9.2     v tidyr     1.3.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x purrr::%@%()         masks rlang::%@%()
## x tidyr::extract()     masks magrittr::extract()
## x dplyr::filter()      masks stats::filter()
## x purrr::flatten()     masks rlang::flatten()
## x purrr::flatten_chr() masks rlang::flatten_chr()
## x purrr::flatten_dbl() masks rlang::flatten_dbl()
## x purrr::flatten_int() masks rlang::flatten_int()
## x purrr::flatten_lgl() masks rlang::flatten_lgl()
## x purrr::flatten_raw() masks rlang::flatten_raw()
## x purrr::invoke()      masks rlang::invoke()
## x dplyr::lag()         masks stats::lag()
## x purrr::set_names()   masks magrittr::set_names(), rlang::set_names()
## x purrr::splice()      masks rlang::splice()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
netflix_titles <- read_csv("titles.csv", col_types = cols( type = col_character()))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
##   dat <- vroom(...)
##   problems(dat)
data_clean <- netflix_titles %>%
  mutate(genres = str_extract(genres, "\\w+")) %>% mutate(production_countries = str_extract(production_countries, "\\w+"))
names(data_clean)
##  [1] "id"                   "title"                "type"                
##  [4] "description"          "release_year"         "age_certification"   
##  [7] "runtime"              "genres"               "production_countries"
## [10] "seasons"              "imdb_id"              "imdb_score"          
## [13] "imdb_votes"           "tmdb_popularity"      "tmdb_score"

Description des variables

[1] “id” : Identifiant unique pour chaque entrée dans le dataset. [2] “title” : Titre du film ou de l’émission. [3] “type” : Type de contenu, comme “FILM” ou “ÉMISSION”. [4] “description” : Description du film ou de l’émission. [5] “release_year” : Année de sortie du film ou de l’émission. [6] “age_certification” : Certification d’âge pour le contenu, indiquant l’âge recommandé pour les spectateurs. [7] “runtime” : Durée en minutes du film ou de l’émission. [8] “genres” : Genres associés au film ou à l’émission. [9] “production_countries” : Pays de production du film ou de l’émission. [10] “seasons” : Nombre de saisons pour les séries télévisées. (NA si non applicable) [11] “imdb_id” : Identifiant IMDb du film ou de l’émission. [12] “imdb_score” : Score IMDb du film ou de l’émission. [13] “imdb_votes” : Nombre de votes IMDb pour le film ou l’émission. [14] “tmdb_popularity” : Popularité du film ou de l’émission sur TMDB (The Movie Database). [15] “tmdb_score” : Score TMDB du film ou de l’émission.

str(data_clean)
## tibble [6,138 x 15] (S3: tbl_df/tbl/data.frame)
##  $ id                  : chr [1:6138] "ts300399" "tm82169" "tm17823" "tm191099" ...
##  $ title               : chr [1:6138] "Five Came Back: The Reference Films" "Rocky" "Grease" "The Sting" ...
##  $ type                : chr [1:6138] "SHOW" "MOVIE" "MOVIE" "MOVIE" ...
##  $ description         : chr [1:6138] "This collection includes 12 World War II-era propaganda films — many of which are graphic and offensive — discu"| __truncated__ "When world heavyweight boxing champion, Apollo Creed wants to give an unknown fighter a shot at the title as a "| __truncated__ "Australian good girl Sandy and greaser Danny fell in love over the summer. But when they unexpectedly discover "| __truncated__ "A novice con man teams up with an acknowledged master to avenge the murder of a mutual friend by pulling off th"| __truncated__ ...
##  $ release_year        : num [1:6138] 1945 1976 1978 1973 1979 ...
##  $ age_certification   : chr [1:6138] "TV-MA" "PG" "PG" "PG" ...
##  $ runtime             : num [1:6138] 51 119 110 129 119 91 109 30 94 120 ...
##  $ genres              : chr [1:6138] "documentation" "drama" "romance" "crime" ...
##  $ production_countries: chr [1:6138] "US" "US" "US" "US" ...
##  $ seasons             : num [1:6138] 1 NA NA NA NA NA NA 4 NA NA ...
##  $ imdb_id             : chr [1:6138] NA "tt0075148" "tt0077631" "tt0070735" ...
##  $ imdb_score          : num [1:6138] NA 8.1 7.2 8.3 7.3 8.2 7.4 8.8 8 7.5 ...
##  $ imdb_votes          : num [1:6138] NA 588100 283316 266738 216307 ...
##  $ tmdb_popularity     : num [1:6138] 0.601 106.361 33.16 24.616 75.699 ...
##  $ tmdb_score          : num [1:6138] NA 7.78 7.41 8.02 7.25 ...
summary(data_clean)
##       id               title               type           description       
##  Length:6138        Length:6138        Length:6138        Length:6138       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##   release_year  age_certification     runtime          genres         
##  Min.   :1945   Length:6138        Min.   :  0.00   Length:6138       
##  1st Qu.:2017   Class :character   1st Qu.: 44.00   Class :character  
##  Median :2019   Mode  :character   Median : 80.00   Mode  :character  
##  Mean   :2017                      Mean   : 76.37                     
##  3rd Qu.:2021                      3rd Qu.:105.00                     
##  Max.   :2023                      Max.   :225.00                     
##  NA's   :2                         NA's   :1                          
##  production_countries    seasons           imdb_id            imdb_score   
##  Length:6138          Min.   :   1.000   Length:6138        Min.   :1.500  
##  Class :character     1st Qu.:   1.000   Class :character   1st Qu.:5.800  
##  Mode  :character     Median :   1.000   Mode  :character   Median :6.600  
##                       Mean   :   5.435                      Mean   :6.541  
##                       3rd Qu.:   2.000                      3rd Qu.:7.300  
##                       Max.   :7677.000                      Max.   :9.600  
##                       NA's   :3832                          NA's   :469    
##    imdb_votes        tmdb_popularity       tmdb_score    
##  Min.   :      5.0   Min.   :   0.0094   Min.   : 0.500  
##  1st Qu.:    516.8   1st Qu.:   3.3805   1st Qu.: 6.000  
##  Median :   2093.5   Median :   7.5800   Median : 6.790  
##  Mean   :  21152.7   Mean   :  19.2687   Mean   : 6.633  
##  3rd Qu.:   8885.5   3rd Qu.:  16.5263   3rd Qu.: 7.400  
##  Max.   :2684317.0   Max.   :1078.6370   Max.   :10.000  
##  NA's   :486         NA's   :78          NA's   :254
View(data_clean)

##Nettoyage et transformation des données

library(tidyverse)

data_clean$genres = as.factor(data_clean$genres)

data_clean <- data_clean %>% select(-c(seasons,id,description,imdb_id,age_certification))
library(dplyr)

# Remove duplicate titles and keep only the unique ones
data_clean <- distinct(data_clean, title, .keep_all = TRUE)
data_clean <- na.omit(data_clean)
data_clean =  as.data.frame(data_clean)

# Set "title" column as the index
row.names(data_clean) <- data_clean$title
data_clean = data_clean %>% select(-title)
data_clean$type = as.factor(data_clean$type)
data_clean$production_countries = as.factor(data_clean$production_countries)

#Analyse univarié

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
p=ggplot(data=data_clean, aes(x=genres)) +geom_bar(stat="count", position=position_dodge(),fill= colorRampPalette(c("red", "steelblue"))(18),size=0.5 ,colour="black") + scale_fill_manual(values=c("#999999", "#E69F00"))+ggtitle("Nombre des film / shows par categorie ") + theme(axis.text.x = element_text(angle = 45, hjust = 1))  
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## i Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplotly(p)

=> Les 2 categories dominantes sont ” Drama ” et ” Comedy ”

dep.plot <- ggplot(data_clean, aes(type)) + 
 geom_bar(stat="count", position=position_dodge(),fill=c('red', 'steelblue'),size=0.5 ,colour="black")+ggtitle("Distibution des individus par type ") +    geom_text(aes(label = ifelse(type == "SHOW", "Show", "MOVIE"), y = 1000), vjust = -1.5)
ggplotly(dep.plot)

=> Le dataset continet 3299 Film et 2042 Emission

library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
c=highchart()
hec <- hchart(
  density(data_clean$imdb_score), 
  type = "area", name = "IMDB score"
  )%>%
  hc_add_series(
    density(data_clean$tmdb_score), type = "area",
    color = "#B71C1C", 
    name = "TMDB score"
    )
hec
numeric_indices <- c(2,3,6,7,8,9)
categorical_indices <- c(1, 4,5)
data_corr = cor(data_clean[,numeric_indices], use = "complete.obs")
hchart(data_corr, type = "heatmap", hcaes(x = colnames(data_clean), y = colnames(data_clean), value = data_corr))

#Analyse en composantes principales

library(FactoMineR)
res.pca=PCA(data_clean,quali.sup = categorical_indices,scale.unit = TRUE)

#Etude de l’inertie

library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
res.pca$eig>1 
##        eigenvalue percentage of variance cumulative percentage of variance
## comp 1       TRUE                   TRUE                              TRUE
## comp 2       TRUE                   TRUE                              TRUE
## comp 3       TRUE                   TRUE                              TRUE
## comp 4      FALSE                   TRUE                              TRUE
## comp 5      FALSE                   TRUE                              TRUE
## comp 6      FALSE                   TRUE                              TRUE
fviz_screeplot(res.pca,addLabels=TRUE)

=> D’aprés la méthode de Kaiser , on va prendre en considération les dimensions avec les valeurs propres supérieures a 1 , donc dans notre cas , on va prendre la premiére, la 2éme et la 3éme dimension

=>Selon la méthode de Coude , on se limite a la dimension avec la chute , la plus conséquente et visible , or dans notre cas la variation entre les dimensions n’est pas brusque/importante

#analyse des variables

library(factoextra)
fviz_pca_var(res.pca,geom = c("arrow","text"),repel = TRUE,col.var = 'cos2')

fviz_pca_var(res.pca,geom = c("arrow","text"),repel = TRUE,col.var = 'contrib')

fviz_pca_var(res.pca,geom = c("arrow","text"),repel = TRUE,col.var = 'coord')

Dim.1 : Cette dimension est principalement caractérisée par les variables “release_year” (année de sortie), “runtime” (durée), “imdb_votes” (votes IMDb), “tmdb_popularity” (popularité TMDB) et “tmdb_score” (score TMDB). Elle semble capturer des aspects liés à la popularité, à la durée et à la réception des films. Des valeurs plus élevées sur cette dimension indiquent des films plus récents, plus longs, avec un plus grand nombre de votes sur IMDb, une plus grande popularité sur TMDB et des scores plus élevés sur TMDB. Cette dimension pourrait être associée au succès global ou à la reconnaissance des films.

Dim.2 : La deuxième dimension est principalement influencée par la variable “imdb_score” (score IMDb). Elle représente la qualité ou la note des films selon IMDb. Des valeurs plus élevées sur cette dimension indiquent des films avec des scores IMDb plus élevés, ce qui correspond à de meilleures évaluations globales. Cette dimension se concentre spécifiquement sur la qualité perçue des films selon les notes IMDb.

#Analyse des individus

fviz_pca_ind(res.pca,geom = c("point","text"),repel = TRUE,col.ind = 'cos2')
## Warning: ggrepel: 5332 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_ind(res.pca,geom = c("point","text"),repel = TRUE,col.ind = 'contrib')
## Warning: ggrepel: 5332 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_ind(res.pca,geom = c("point","text"),repel = TRUE,col.ind = 'coord')
## Warning: ggrepel: 5332 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

fviz_pca_ind(res.pca, geom.ind = "point", col.ind = data_clean$type, addEllipses = TRUE,ellips.type="confidence",repel=TRUE ,
             legend.title = "Group", title = "PCA - Individuals")

#Classification Hierarchique Ascendante

scaled_data = scale(data_clean[,numeric_indices],center=T)
d1=dist(scaled_data[1:500,],method="euclidean")
h1=hclust(d1,method="ward.D")
h1$method
## [1] "ward.D"
h1$order
##   [1] 114 115 126 134 164 176 108 255 409 410 422 423 133 431 124 268 199 214
##  [19] 191 206 244  89  91 188 192 497 447 238 461 121 334 196 211 162 171 112
##  [37] 285 104 113 308 316 130 310 127 287 111 100 118  54 120 240 298 101 123
##  [55]  82 236 208 325 215 348 216 197 272 227 291 292 223 242 311 249 251 253
##  [73] 354 286 446 213 438 444 248 335 204 237 222 269 315 256 374 274 275 440
##  [91] 495 469 402 496 303 190 221 455 293 267 281 284 386 404 353 376 318 427
## [109] 294 419 347 435 383 301 416 475 448 345 361 319 464 273 398 385 442 468
## [127] 470 349 418 463 352 414 467 332 392 363 425 493 283 390 143 342 366 389
## [145] 279 346 471 387 405 327 441 321 326 451 456 276 343 296 381 369 331 450
## [163] 443 483 397 485 250 429 388 362 413 306 486 488 399 411 230 403 373 382
## [181] 433 434 394 395 408 428 351 426 270 462 329 340 417 359 458 320 309 484
## [199] 368 421 384 436 437 407 491 489 460 492 314 452 478 481 333 271 482 358
## [217] 459 219 379  98 145 302 312 307 377 280 393 234 337 290 254 266 232 278
## [235] 220 257 233 245 262 225 228 263  96 239 243 246 224 235 341 264 259 317
## [253] 241 300 305 177 151 372 375 295 378 153 138 148 163 313 499 324 339 453
## [271] 449 371 465 479 328 415 380 391 457 288 364 260 323 146 258 396 477 304
## [289] 356 412 473 498 277 472 365 344 289 297 350 355 282 370 330 406 476 487
## [307] 140 159 430 474 432 454 338 420 445 480 494 322 400 401 466 336 357 424
## [325] 439 367 360 490  86 202 189 203 180 195   7  17  12   9  27  26  19  22
## [343]  24  31  23  11  16  25  14  10  30  15  34  21  32  53  29  81  28  33
## [361]  18  56  60  45  20  79  94 109 128  95 129  58  47  51  46  61 110  37
## [379]  42 167  65  80  41  49  75  77  63  13  57 172 179 160 174 102 117 116
## [397] 149 107 122 105  97 135 103 106 229  85  90  99 119  43  44  71  73  68
## [415] 170  62 141 161 132 136 137 152 158 125 142 173 166  70 147 157 169 139
## [433] 155 165 131 178  52  76  59  78  72  69  74  55  66  50  48  64  67 144
## [451] 150 154 168 156 175  84 185 187 500 181 182  87 200 205 265 226 299  92
## [469]  93 207 252 209 217 261 218 247 193 212 231 194 210 184 183 186 198  35
## [487]  88  83 201   3   6  39  38  40   2   4   5   8   1  36
h1$merge
##        [,1] [,2]
##   [1,] -309 -484
##   [2,] -282 -370
##   [3,] -409 -410
##   [4,] -340 -417
##   [5,] -250 -429
##   [6,] -280 -393
##   [7,] -267 -281
##   [8,] -314 -452
##   [9,] -362 -413
##  [10,] -406 -476
##  [11,] -288 -364
##  [12,] -480 -494
##  [13,] -443 -483
##  [14,] -283 -390
##  [15,] -336 -357
##  [16,] -251 -253
##  [17,] -412 -473
##  [18,] -351 -426
##  [19,] -408 -428
##  [20,] -142 -173
##  [21,] -418 -463
##  [22,] -196 -211
##  [23,] -284 -386
##  [24,] -256 -374
##  [25,] -486 -488
##  [26,] -162 -171
##  [27,] -320    1
##  [28,] -148 -163
##  [29,] -355    2
##  [30,] -273 -398
##  [31,] -270 -462
##  [32,] -394 -395
##  [33,] -436 -437
##  [34,]    7   23
##  [35,] -407 -491
##  [36,]  -37  -42
##  [37,] -460 -492
##  [38,] -306   25
##  [39,] -399 -411
##  [40,] -304 -356
##  [41,]  -59  -78
##  [42,] -114 -115
##  [43,] -391 -457
##  [44,] -127 -287
##  [45,] -363 -425
##  [46,] -345 -361
##  [47,] -493   14
##  [48,] -295 -378
##  [49,] -353 -376
##  [50,] -248 -335
##  [51,]  -70 -147
##  [52,]  -65  -80
##  [53,] -307 -377
##  [54,] -232 -278
##  [55,] -279 -346
##  [56,] -385 -442
##  [57,] -277 -472
##  [58,] -228 -263
##  [59,] -289 -297
##  [60,] -274 -275
##  [61,] -243 -246
##  [62,] -126 -134
##  [63,] -397 -485
##  [64,] -319 -464
##  [65,] -204 -237
##  [66,] -466   15
##  [67,] -152 -158
##  [68,] -422 -423
##  [69,] -478 -481
##  [70,] -332 -392
##  [71,]  -99 -119
##  [72,] -388    9
##  [73,] -339 -453
##  [74,] -125   20
##  [75,] -368 -421
##  [76,] -404   49
##  [77,] -318 -427
##  [78,] -190 -221
##  [79,] -167   52
##  [80,] -230 -403
##  [81,] -359 -458
##  [82,]  -48  -64
##  [83,] -371 -465
##  [84,] -100 -118
##  [85,] -199 -214
##  [86,] -498   57
##  [87,] -373 -382
##  [88,]  -94 -109
##  [89,]   18   31
##  [90,]  -16  -25
##  [91,] -467   70
##  [92,] -434   32
##  [93,]  -13  -57
##  [94,] -360 -490
##  [95,] -238 -461
##  [96,] -155 -165
##  [97,] -215 -348
##  [98,] -104 -113
##  [99,] -116 -149
## [100,] -331 -450
## [101,]  -56  -60
## [102,] -157 -169
## [103,] -260 -323
## [104,] -254 -266
## [105,] -164 -176
## [106,] -430 -474
## [107,] -138   28
## [108,] -124 -268
## [109,] -102 -117
## [110,]   24   60
## [111,] -338 -420
## [112,] -310   44
## [113,] -235 -341
## [114,] -349   21
## [115,] -344   59
## [116,] -107 -122
## [117,] -321 -326
## [118,] -396 -477
## [119,]  -71  -73
## [120,] -156 -175
## [121,] -151 -372
## [122,]  -29  -81
## [123,] -213 -438
## [124,] -448   46
## [125,] -291 -292
## [126,]  -76   41
## [127,]   13   63
## [128,]  -75  -77
## [129,]  -41  -49
## [130,] -106 -229
## [131,] -300 -305
## [132,] -424 -439
## [133,]  -20  -79
## [134,] -324   73
## [135,] -489   37
## [136,] -259 -317
## [137,]  -95 -129
## [138,] -352 -414
## [139,]   27   75
## [140,]  -46  -61
## [141,] -293   34
## [142,] -153  107
## [143,]   38   39
## [144,]  -38  -40
## [145,]   45   47
## [146,] -172 -179
## [147,]    5   72
## [148,] -234 -337
## [149,] -327 -441
## [150,] -367   94
## [151,] -225   58
## [152,] -400 -401
## [153,] -222 -269
## [154,] -389   55
## [155,] -455  141
## [156,] -301 -416
## [157,]   62  105
## [158,] -206 -244
## [159,] -432 -454
## [160,]  -10  -30
## [161,] -451 -456
## [162,] -264  136
## [163,] -132 -136
## [164,] -220 -257
## [165,]  -15  -34
## [166,]  -55  -66
## [167,] -224  113
## [168,] -242 -311
## [169,] -112 -285
## [170,] -325   97
## [171,] -308 -316
## [172,]   17   86
## [173,] -447   95
## [174,] -286 -446
## [175,]   19   89
## [176,] -499  134
## [177,] -207 -252
## [178,]  -47  -51
## [179,] -144 -150
## [180,] -405  149
## [181,] -131 -178
## [182,] -329    4
## [183,] -137   67
## [184,] -177  121
## [185,]    3   68
## [186,] -296 -381
## [187,] -245 -262
## [188,] -330   10
## [189,] -271 -482
## [190,] -445   12
## [191,]  -96 -239
## [192,] -444   50
## [193,] -365  115
## [194,]   99  116
## [195,]   -9  -27
## [196,] -111   84
## [197,]  132  150
## [198,]  -89  -91
## [199,] -240 -298
## [200,]  -97 -135
## [201,]  -43  -44
## [202,]  -52  126
## [203,] -121 -334
## [204,] -249   16
## [205,] -101 -123
## [206,]  -92  -93
## [207,] -369  100
## [208,]    8   69
## [209,] -166   51
## [210,]  -50   82
## [211,] -205 -265
## [212,]   33   35
## [213,] -328 -415
## [214,] -294 -419
## [215,]   11  103
## [216,] -440 -495
## [217,]  -18  101
## [218,] -475  124
## [219,] -186 -198
## [220,] -133 -431
## [221,] -103  130
## [222,] -168  120
## [223,]   30   56
## [224,]   76   77
## [225,] -189 -203
## [226,] -303   78
## [227,]  114  138
## [228,] -354  174
## [229,]  163  183
## [230,] -128  137
## [231,]  117  161
## [232,]   48  142
## [233,]  -11   90
## [234,] -227  125
## [235,]  -53  122
## [236,] -191  158
## [237,] -209 -217
## [238,]   65  153
## [239,]  106  159
## [240,] -497  173
## [241,] -194 -210
## [242,] -347 -435
## [243,] -350   29
## [244,] -226 -299
## [245,] -223  168
## [246,]   61  167
## [247,] -143 -342
## [248,]  -82 -236
## [249,] -110   36
## [250,] -315  110
## [251,]   54  164
## [252,] -197 -272
## [253,] -258  118
## [254,]  109  194
## [255,]   42  157
## [256,] -313  176
## [257,] -146  253
## [258,]  -62 -141
## [259,] -218 -247
## [260,]   98  171
## [261,]  -68 -170
## [262,]   91  145
## [263,]   80   87
## [264,] -290  104
## [265,]   22   26
## [266,]  -69  -74
## [267,] -140 -159
## [268,]  -83 -201
## [269,]   40  172
## [270,] -154  222
## [271,] -449   83
## [272,]  169  260
## [273,]  -21  -32
## [274,] -384  212
## [275,]  -85  -90
## [276,]   64  223
## [277,] -276 -343
## [278,]   -2   -4
## [279,] -468 -470
## [280,] -402 -496
## [281,] -233  187
## [282,] -105  200
## [283,] -433   92
## [284,]  -63   93
## [285,] -241  131
## [286,]  -54 -120
## [287,] -479  213
## [288,] -161  229
## [289,] -139   96
## [290,] -322  152
## [291,]  127  147
## [292,]   88  230
## [293,] -216  252
## [294,]  162  285
## [295,] -160 -174
## [296,]   -6  -39
## [297,] -183  219
## [298,] -193 -212
## [299,] -387  180
## [300,]    6  148
## [301,]  -28  -33
## [302,]  -12  195
## [303,] -380   43
## [304,] -208  170
## [305,]  -19  -22
## [306,] -383  156
## [307,]  111  190
## [308,] -188 -192
## [309,] -108 -255
## [310,]  -72  266
## [311,]  151  191
## [312,]   81  139
## [313,] -366  154
## [314,]  140  249
## [315,]  175  182
## [316,] -130  112
## [317,]   85  236
## [318,] -302 -312
## [319,]   79  129
## [320,]  143  263
## [321,]  -14  160
## [322,]  135  208
## [323,]  204  228
## [324,]  177  237
## [325,]   66  197
## [326,]  234  245
## [327,]  242  306
## [328,]  247  313
## [329,]  239  307
## [330,]  186  207
## [331,]  238  250
## [332,]  179  270
## [333,]  102  289
## [334,]  -26  305
## [335,]  221  275
## [336,] -184  297
## [337,]   -7  -17
## [338,]  272  316
## [339,]  185  220
## [340,]   -5   -8
## [341,] -375  232
## [342,] -261  259
## [343,] -219 -379
## [344,]  181  202
## [345,]  203  265
## [346,]  166  210
## [347,]  -45  133
## [348,]  211  244
## [349,]  193  243
## [350,]  146  295
## [351,]  144  278
## [352,] -487  267
## [353,]  -88  268
## [354,]  123  192
## [355,]  -84 -185
## [356,]  165  273
## [357,]  205  248
## [358,] -358 -459
## [359,]  108  317
## [360,] -471  299
## [361,]  256  271
## [362,]  235  301
## [363,]  155  224
## [364,]  -58  178
## [365,]   71  201
## [366,]  -98 -145
## [367,]  254  282
## [368,]  218  276
## [369,]  274  322
## [370,]  251  281
## [371,]   53  300
## [372,]  217  347
## [373,]  209  333
## [374,] -469  280
## [375,]  -24  -31
## [376,]  258  288
## [377,]  196  286
## [378,]  287  303
## [379,]  198  308
## [380,]  206  324
## [381,]  314  319
## [382,]  227  262
## [383,]  128  284
## [384,]  309  339
## [385,] -231  241
## [386,]  290  325
## [387,]  296  351
## [388,]   -1  -36
## [389,]  293  326
## [390,]  215  257
## [391,]  264  370
## [392,]  240  345
## [393,]  321  356
## [394,]  226  363
## [395,] -202  225
## [396,]  184  341
## [397,]  246  294
## [398,]  277  330
## [399,]  188  352
## [400,]  214  327
## [401,]   -3  387
## [402,]  359  379
## [403,]  189  358
## [404,]   74  373
## [405,]  199  357
## [406,]  348  380
## [407,]  340  388
## [408,]  310  346
## [409,]  328  360
## [410,]  233  393
## [411,]  279  382
## [412,]  343  366
## [413,]  304  389
## [414,]  269  349
## [415,]  318  371
## [416,]  312  369
## [417,]  291  320
## [418,]  334  375
## [419,]  -23  410
## [420,]  298  385
## [421,]  119  261
## [422,]  342  420
## [423,]  361  378
## [424,]  231  398
## [425,]  344  408
## [426,]  362  372
## [427,]  292  364
## [428,]  255  384
## [429,]  311  397
## [430,]  323  354
## [431,]  -87 -200
## [432,]  -35  353
## [433,]  216  374
## [434,]  315  416
## [435,]  338  377
## [436,]  283  434
## [437,] -187 -500
## [438,]  302  418
## [439,]  390  414
## [440,]  329  386
## [441,] -333  403
## [442,]  -86  395
## [443,]  412  415
## [444,]  -67  332
## [445,]  392  435
## [446,]  350  367
## [447,]  381  383
## [448,]  335  365
## [449,]  355  437
## [450,]  368  411
## [451,]  405  413
## [452,]  417  436
## [453,]  331  433
## [454,]  391  429
## [455,]  376  404
## [456,]  337  438
## [457,]  399  440
## [458,]  419  426
## [459,]  406  422
## [460,]  430  453
## [461,]  446  448
## [462,]  409  424
## [463,]  396  423
## [464,]  336  432
## [465,]  421  455
## [466,] -181 -182
## [467,]  439  457
## [468,]  401  407
## [469,]  451  460
## [470,]  425  444
## [471,]  394  400
## [472,] -180 -195
## [473,]  450  462
## [474,]  402  445
## [475,]  442  472
## [476,]  447  461
## [477,]  449  466
## [478,]  428  474
## [479,]  431  459
## [480,]  441  443
## [481,]  427  476
## [482,]  456  458
## [483,]  469  471
## [484,]  465  470
## [485,]  463  467
## [486,]  478  483
## [487,]  481  484
## [488,]  464  468
## [489,]  454  485
## [490,]  479  488
## [491,]  480  489
## [492,]  452  491
## [493,]  482  487
## [494,]  473  492
## [495,]  477  490
## [496,]  493  495
## [497,]  486  494
## [498,]  475  496
## [499,]  497  498
library(dendextend)
## 
## ---------------------
## Welcome to dendextend version 1.15.2
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
hcd <- as.dendrogram(h1)
hcd
## 'dendrogram' with 2 branches and 500 members total, at height 221.3302
plot(raise.dendrogram(hcd, 100), main = "Raised tree")

library(clValid)
## Loading required package: cluster
options(clValid.maxitems = 500)
subset_data <- scaled_data[1:500, ]
intern=clValid(subset_data,2:6,clmethods=c("hierarchical"),validation="internal")
summary(intern)
## 
## Clustering Methods:
##  hierarchical 
## 
## Cluster sizes:
##  2 3 4 5 6 
## 
## Validation Measures:
##                                  2       3       4       5       6
##                                                                   
## hierarchical Connectivity   4.2143 10.1143 12.7726 14.9810 18.2238
##              Dunn           0.3661  0.2511  0.2511  0.2511  0.2511
##              Silhouette     0.8236  0.7123  0.6794  0.6417  0.6393
## 
## Optimal Scores:
## 
##              Score  Method       Clusters
## Connectivity 4.2143 hierarchical 2       
## Dunn         0.3661 hierarchical 2       
## Silhouette   0.8236 hierarchical 2

=> Suite au tests de validation on peut choisir le nombre des cluster optimal = 2

##Ajout des classes au jeux de données et visualisation des cluster sur les plans de l’ACP

classes= cutree(h1,k=2)
classes
##                                                                           Rocky 
##                                                                               1 
##                                                                          Grease 
##                                                                               1 
##                                                                       The Sting 
##                                                                               1 
##                                                                        Rocky II 
##                                                                               1 
##                                                 Monty Python and the Holy Grail 
##                                                                               1 
##                                                                    Animal House 
##                                                                               1 
##                                                    Monty Python's Flying Circus 
##                                                                               1 
##                                                                   Life of Brian 
##                                                                               1 
##                                                                 White Christmas 
##                                                                               1 
##                                                                          Heroes 
##                                                                               1 
##                                                               Play Misty for Me 
##                                                                               1 
##                                                                   Cairo Station 
##                                                                               1 
##                                                  Richard Pryor: Live in Concert 
##                                                                               1 
##                                                                          Bandie 
##                                                                               1 
##                                                                          Prince 
##                                                                               1 
##                                                                             FTA 
##                                                                               1 
##                                                Monty Python's Fliegender Zirkus 
##                                                                               1 
##                                                                Hitler: A Career 
##                                                                               1 
##                                                                        Amrapali 
##                                                                               1 
##                                                             Alibaba Aur 40 Chor 
##                                                                               1 
##                                                                      Manoranjan 
##                                                                               1 
##                                                                       Professor 
##                                                                               1 
##                                                                     Khoon Khoon 
##                                                                               1 
##                                                          Saladin the Victorious 
##                                                                               1 
##                                                  The Other Side of the Mountain 
##                                                                               1 
##                                                                     Dark Waters 
##                                                                               1 
##                                                                 The Blazing Sun 
##                                                                               1 
##                                                  The Return of the Prodigal Son 
##                                                                               1 
##                                                                Alexandria… Why? 
##                                                                               1 
##                                                                       Salaakhen 
##                                                                               1 
##                                                                        The Land 
##                                                                               1 
##                                                                           Elaan 
##                                                                               1 
##                                                                        Whispers 
##                                                                               1 
##                                                                     Lal Patthar 
##                                                                               1 
##                                                                        Seinfeld 
##                                                                               1 
##                                                                         Top Gun 
##                                                                               1 
##                                                                      Road House 
##                                                                               1 
##                                                                        Rocky IV 
##                                                                               1 
##                                                                       Labyrinth 
##                                                                               1 
##                                                                       Rocky III 
##                                                                               1 
##                                                                          Fletch 
##                                                                               1 
##                                                                      The 'Burbs 
##                                                                               1 
##                                                                         Rocky V 
##                                                                               1 
##                                                                           Twins 
##                                                                               1 
##                                                                   Out of Africa 
##                                                                               1 
##                                                                      Parenthood 
##                                                                               1 
##                                                                Thomas & Friends 
##                                                                               1 
##                                                                    Legal Eagles 
##                                                                               1 
##                                                             She's Gotta Have It 
##                                                                               1 
##                                                                  Strange Voices 
##                                                                               1 
##                                                                     Fireman Sam 
##                                                                               1 
##                                                     In Defense of a Married Man 
##                                                                               1 
##                                                                The Four Seasons 
##                                                                               1 
##                        Parrot Sketch Not Included: Twenty Years of Monty Python 
##                                                                               2 
##                                                                 The Little Wars 
##                                                                               1 
##                                     Mobile Suit Gundam III: Encounters in Space 
##                                                                               1 
##                                         Monty Python Live at the Hollywood Bowl 
##                                                                               1 
##                                                                    Danger Mouse 
##                                                                               1 
##                                                                Unspeakable Acts 
##                                                                               1 
##                                       Mobile Suit Gundam II: Soldiers of Sorrow 
##                                                                               1 
##                                        Mobile Suit Gundam: Char's Counterattack 
##                                                                               1 
##                                                                       Agneepath 
##                                                                               1 
##                                                          Waiting for the Hearse 
##                                                                               1 
##                                                        The George McKenna Story 
##                                                                               1 
##                                                            The Ryan White Story 
##                                                                               1 
##                                                                    Disco Dancer 
##                                                                               1 
##                                                                         Dostana 
##                                                                               1 
##                                                                          Mujrim 
##                                                                               1 
##                                                                   Sohni Mahiwal 
##                                                                               1 
##                                                                             Dil 
##                                                                               1 
##                                                                  Pyar Ke Do Pal 
##                                                                               1 
##                                                                            Jaal 
##                                                                               1 
##                                                                          Duniya 
##                                                                               1 
##                                                                  Aakhri Adaalat 
##                                                                               1 
##                                      Quiet Victory: The Charlie Wedemeyer Story 
##                                                                               1 
##                                                   Alexandria, Again and Forever 
##                                                                               1 
##                                                        Adam: His Song Continues 
##                                                                               1 
##                                                      A Stoning in Fulham County 
##                                                                               1 
##                                                                Ek Jaan Hain Hum 
##                                                                               1 
##                                                              Too Young The Hero 
##                                                                               1 
##                                                               An Egyptian Story 
##                                                                               1 
##                                                                        Survivor 
##                                                                               2 
##                                                                       One Piece 
##                                                                               1 
##                                                                         Pokémon 
##                                                                               1 
##                                                            Sleepless in Seattle 
##                                                                               1 
##                                                                    Forrest Gump 
##                                                                               1 
##                                                                  Reservoir Dogs 
##                                                                               1 
##                                                                   Gilmore Girls 
##                                                                               1 
##                                                                    Cowboy Bebop 
##                                                                               2 
##                                                                    Galaxy Quest 
##                                                                               1 
##                                                         Neon Genesis Evangelion 
##                                                                               2 
##                                                                   Jerry Maguire 
##                                                                               1 
##                                                                    Notting Hill 
##                                                                               1 
##                                                                  The Real World 
##                                                                               1 
##                                                                   Power Rangers 
##                                                                               1 
##                                                                       Big Daddy 
##                                                                               2 
##                                                          The Quick and the Dead 
##                                                                               1 
##                                                                     Big Brother 
##                                                                               2 
##                                                 I Know What You Did Last Summer 
##                                                                               1 
##                                                            The Magic School Bus 
##                                                                               2 
##                                                                        Inuyasha 
##                                                                               2 
##                                                           Kicking and Screaming 
##                                                                               1 
##                                                            Seven Years in Tibet 
##                                                                               1 
##                                                                       Yu-Gi-Oh! 
##                                                                               2 
##                                                                         Stepmom 
##                                                                               1 
##                                                               The Mask of Zorro 
##                                                                               1 
##                                                                            Life 
##                                                                               1 
##                                  Neon Genesis Evangelion: The End of Evangelion 
##                                                                               2 
##                                                Adventures of Sonic the Hedgehog 
##                                                                               1 
##                                                                         My Girl 
##                                                                               1 
##                                                                     Kenan & Kel 
##                                                                               2 
##                                                                     Girlfriends 
##                                                                               2 
##                                                                               H 
##                                                                               2 
##                               Jerry Seinfeld: I'm Telling You for the Last Time 
##                                                                               2 
##                                                                   The Last Days 
##                                                                               2 
##                                                                        Croupier 
##                                                                               1 
##                                                                           Heavy 
##                                                                               1 
##                                                                        All That 
##                                                                               2 
##                                                             The Nutty Professor 
##                                                                               1 
##                                                                 Heartbreak High 
##                                                                               2 
##                                                                     The Wiggles 
##                                                                               2 
##                                                               Edge of Seventeen 
##                                                                               1 
##                                                               Cardcaptor Sakura 
##                                                                               2 
##                                                                          Okupas 
##                                                                               2 
##                                                                         Sankofa 
##                                                                               1 
##                                       A Chinese Odyssey Part One: Pandora's Box 
##                                                                               2 
##                                                                     The Parkers 
##                                                                               2 
##                                                                          Moesha 
##                                                                               1 
##                                                                  Sister, Sister 
##                                                                               1 
##                                                        Before the Flying Circus 
##                                                                               2 
##                                                                    Maya Memsaab 
##                                                                               1 
##                                                                        Dil Se.. 
##                                                                               1 
##                                                                     Herod's Law 
##                                                                               2 
##                                          A Chinese Odyssey Part Two: Cinderella 
##                                                                               2 
##                                                                     Blue Streak 
##                                                                               1 
##                                                            Kabhi Haan Kabhi Naa 
##                                                                               1 
##                                                                          Bombay 
##                                                                               1 
##                                                     Phir Bhi Dil Hai Hindustani 
##                                                                               2 
##                                                                         Chaahat 
##                                                                               1 
##                                                  Nutty Professor II: The Klumps 
##                                                                               2 
##                                                          Hum Aapke Hain Koun..! 
##                                                                               1 
##                                                                      Wild Tango 
##                                                                               1 
##                                                              Kuch Kuch Hota Hai 
##                                                                               2 
##                                                                     Yaar Gaddar 
##                                                                               1 
##                                                                Barney & Friends 
##                                                                               2 
##                                                                       The Other 
##                                                                               2 
##                                                                       Chamatkar 
##                                                                               1 
##                                                                            Mann 
##                                                                               2 
##                                                    Chronicle of a Disappearance 
##                                                                               1 
##                                                      Oh Darling! Yeh Hai India! 
##                                                                               1 
##                                                                       Duplicate 
##                                                                               2 
##                                                                           Muthu 
##                                                                               1 
##                                                            Hum Saath Saath Hain 
##                                                                               2 
##                                                                       Ram Jaane 
##                                                                               1 
##                                                                          Anjaam 
##                                                                               1 
##                                                           English Babu Desi Mem 
##                                                                               1 
##                                                                         Destiny 
##                                                                               1 
##                                                                  Avvai Shanmugi 
##                                                                               1 
##                                                                   Hello Brother 
##                                                                               2 
##                                                   Sinbad: Afros and Bellbottoms 
##                                                                               1 
##                                                                          Damini 
##                                                                               1 
##                                                     Monty Python: Live at Aspen 
##                                                                               2 
##                                                                           Jeans 
##                                                                               2 
##                                                                     West Beirut 
##                                                                               2 
##                                                                          Gumrah 
##                                                                               1 
##                                                                         Shikari 
##                                                                               1 
##                                                                     Out of Life 
##                                                                               1 
##                                                                           Yodha 
##                                                                               1 
##                                                                  Minsara Kanavu 
##                                                                               1 
##                                                                        Dushmani 
##                                                                               1 
##                                                             Children of Shatila 
##                                                                               2 
##                                                    Sinbad: Nothin' but the Funk 
##                                                                               1 
##                                                                    The Emigrant 
##                                                                               1 
##                                    A Triumph of the Heart: The Ricky Bell Story 
##                                                                               1 
##                                                                   Aashik Aawara 
##                                                                               1 
##                                                     The Trial of Adolf Eichmann 
##                                                                               2 
##                                                                            Qila 
##                                                                               2 
##                                                    Nightmare in Columbia County 
##                                                                               1 
##                                                   Sinbad: Son of a Preacher Man 
##                                                                               1 
##                                                                    Breaking Bad 
##                                                                               1 
##                                                                The Walking Dead 
##                                                                               1 
##                                                                  Grey's Anatomy 
##                                                                               1 
##                                                            Arrested Development 
##                                                                               1 
##                                                                       Community 
##                                                                               1 
##                                                                            NCIS 
##                                                                               1 
##                                                      Avatar: The Last Airbender 
##                                                                               1 
##                                                                    Supernatural 
##                                                                               1 
##                                                                        Top Gear 
##                                                                               2 
##                               The Lord of the Rings: The Fellowship of the Ring 
##                                                                               1 
##                                                   The Great British Baking Show 
##                                                                               2 
##                                                             Friday Night Lights 
##                                                                               2 
##                                                                    The IT Crowd 
##                                                                               2 
##                                                                 Minority Report 
##                                                                               1 
##                                                     Scott Pilgrim vs. the World 
##                                                                               1 
##                                                                 The Dark Knight 
##                                                                               1 
##                                                                The Amazing Race 
##                                                                               2 
##                                                                       Heartland 
##                                                                               2 
##                                                                      DEATH NOTE 
##                                                                               1 
##                                                                         Monster 
##                                                                               2 
##                                                                    The Hangover 
##                                                                               1 
##                                                                          Naruto 
##                                                                               1 
##                                           The Lord of the Rings: The Two Towers 
##                                                                               1 
##                                   The Lord of the Rings: The Return of the King 
##                                                                               1 
##                                                                        Mononoke 
##                                                                               2 
##                                                              Brokeback Mountain 
##                                                                               1 
##                                                                Chappelle's Show 
##                                                                               2 
##                                                                        The Mist 
##                                                                               1 
##                                                                  Ancient Aliens 
##                                                                               2 
##                                                               Road to Perdition 
##                                                                               1 
##                                                                          Easy A 
##                                                                               1 
##                                                                        The Mole 
##                                                                               2 
##                                                        The Pursuit of Happyness 
##                                                                               1 
##                                                                American Pickers 
##                                                                               2 
##                                            Code Geass: Lelouch of the Rebellion 
##                                                                               2 
##                                                                      Pawn Stars 
##                                                                               2 
##                                             My Little Pony: Friendship Is Magic 
##                                                                               2 
##                                                                          Closer 
##                                                                               1 
##                                                                     The Aviator 
##                                                                               1 
##                                                                       The Hills 
##                                                                               2 
##                                                                       Burlesque 
##                                                                               2 
##                                                                          Borgen 
##                                                                               2 
##                                                             Transformers: Prime 
##                                                                               2 
##                                                                          iCarly 
##                                                                               2 
##                                                      Zathura: A Space Adventure 
##                                                                               2 
##                                                                The Longest Yard 
##                                                                               2 
##                                                                          Ip Man 
##                                                                               1 
##                                        Ned's Declassified School Survival Guide 
##                                                                               2 
##                                                                   Secret Window 
##                                                                               2 
##                                                                 A Knight's Tale 
##                                                                               1 
##                                                                      Underworld 
##                                                                               2 
##                                             Transformers: Revenge of the Fallen 
##                                                                               1 
##                                                                   Julie & Julia 
##                                                                               2 
##                                                                Tears of the Sun 
##                                                                               2 
##                                                                    Laguna Beach 
##                                                                               2 
##                                                                Along Came Polly 
##                                                                               2 
##                                                           The Fairly OddParents 
##                                                                               2 
##                                                                  Midnight Diner 
##                                                                               2 
##                                                                   The Staircase 
##                                                                               2 
##                                                                   Resident Evil 
##                                                                               2 
##                                                                  Hidden Passion 
##                                                                               2 
##                                                     G.I. Joe: The Rise of Cobra 
##                                                                               2 
##                                                                      Victorious 
##                                                                               2 
##                                                                    Flushed Away 
##                                                                               2 
##                                                               Trailer Park Boys 
##                                                                               2 
##                                                                      Seabiscuit 
##                                                                               2 
##                                                                   Monster House 
##                                                                               2 
##                                                                       King Kong 
##                                                                               1 
##                                                              Total Drama Island 
##                                                                               2 
##                                                                         Sonic X 
##                                                                               2 
##                                                              The Order of Myths 
##                                                                               2 
##                                                                       Winx Club 
##                                                                               2 
##                                                                              21 
##                                                                               1 
##                                                                        Zoey 101 
##                                                                               2 
##                                                                       Spanglish 
##                                                                               2 
##                                                Spirit: Stallion of the Cimarron 
##                                                                               2 
##                                                 kimi ni todoke -From Me to You- 
##                                                                               2 
##                                                                       Leap Year 
##                                                                               2 
##                                                                          Enough 
##                                                                               2 
##                                                                           Gamer 
##                                                                               2 
##                                                                The Pink Panther 
##                                                                               2 
##                                                                        3 Idiots 
##                                                                               1 
##                                                                   Gridiron Gang 
##                                                                               2 
##                                                       Resident Evil: Apocalypse 
##                                                                               2 
##                                                        Resident Evil: Afterlife 
##                                                                               2 
##                                                                    Seven Pounds 
##                                                                               1 
##                                                             The Legend of Zorro 
##                                                                               2 
##                                                               Autumn's Concerto 
##                                                                               2 
##                                                                      Still Game 
##                                                                               2 
##                                                                    Black Butler 
##                                                                               2 
##                                                                     Nora's Will 
##                                                                               2 
##                                                                  An Elf's Story 
##                                                                               2 
##                                                               Boys Over Flowers 
##                                                                               2 
##                                                                             Don 
##                                                                               2 
##                                                                       Toradora! 
##                                                                               2 
##                                                     Ouran High School Host Club 
##                                                                               2 
##                                                                 Rang De Basanti 
##                                                                               2 
##                                                            Daddy's Little Girls 
##                                                                               2 
##                                                           The Other Boleyn Girl 
##                                                                               2 
##                                                                          Swades 
##                                                                               2 
##                                                                   Bo on the Go! 
##                                                                               2 
##                                Trailer Park Boys: Say Goodnight to the Bad Guys 
##                                                                               2 
##                                                                     New in Town 
##                                                                               2 
##                                                                           Dev.D 
##                                                                               2 
##                             Mike Birbiglia: What I Should Have Said Was Nothing 
##                                                                               2 
##                                                                      Code Lyoko 
##                                                                               2 
##                                                                        The Game 
##                                                                               2 
##                                                                      One on One 
##                                                                               2 
##                                                                  Daddy Day Care 
##                                                                               2 
##                                              You Will Meet a Tall Dark Stranger 
##                                                                               2 
##                                                                   Eat Pray Love 
##                                                                               2 
##                                                          Nuevo Rico Nuevo Pobre 
##                                                                               2 
##                                                             H2O: Just Add Water 
##                                                                               2 
##                                           Einsatzgruppen: The Nazi Death Squads 
##                                                                               2 
##                                                                 The Fierce Wife 
##                                                                               2 
##                                                                           Asoka 
##                                                                               2 
##                                                                    A Wednesday! 
##                                                                               2 
##                                                                           Chloe 
##                                                                               2 
##                                                                      The Cartel 
##                                                                               2 
##                                                             Hachi: A Dog's Tale 
##                                                                               1 
##                                                              Monsters vs Aliens 
##                                                                               2 
##                                                         The Legend of Bruce Lee 
##                                                                               2 
##                                                                Basketball Wives 
##                                                                               2 
##                                                           Louis C.K.: Hilarious 
##                                                                               2 
##                                                      The Boy Who Cried Werewolf 
##                                                                               2 
##                                                      The Taking of Pelham 1 2 3 
##                                                                               2 
##                                                                       Initial D 
##                                                                               2 
##                                                               The Garfield Show 
##                                                                               2 
##                                              Kevin James: Sweat the Small Stuff 
##                                                                               2 
##                                                                     Duck Season 
##                                                                               2 
##                                                                     Half & Half 
##                                                                               2 
##                                                                   Big Time Rush 
##                                                                               2 
##                                                                          Pocoyo 
##                                                                               2 
##                                                                    Love Aaj Kal 
##                                                                               2 
##                                                                        Accident 
##                                                                               2 
##                                                                   Black & White 
##                                                                               2 
##                                                                     El Escamoso 
##                                                                               2 
##                                               I Now Pronounce You Chuck & Larry 
##                                                                               2 
##                                                   Jim Gaffigan: Beyond the Pale 
##                                                                               2 
##                                                                 Phir Hera Pheri 
##                                                                               2 
##                                                    Trailer Park Boys: The Movie 
##                                                                               2 
##                                                      The Legend of Bhagat Singh 
##                                                                               2 
##                                                                     Anchor Baby 
##                                                                               2 
##                                                                              RV 
##                                                                               2 
##                                                                           Billu 
##                                                                               2 
##                                                              Masha and the Bear 
##                                                                               2 
##                                                           Madness in the Desert 
##                                                                               2 
##                                                                            Guru 
##                                                                               2 
##                                                                 Golmaal Returns 
##                                                                               2 
##                                                         One Piece: Strong World 
##                                                                               2 
##                                                         The Figurine: Araromire 
##                                                                               2 
##                                                                      Jab We Met 
##                                                                               2 
##                                                         Jaane Tu... Ya Jaane Na 
##                                                                               2 
##                                                                         Banyuki 
##                                                                               2 
##                                                                      Kath & Kim 
##                                                                               2 
##                                                                    Den-noh Coil 
##                                                                               2 
##                                                               Palermo Hollywood 
##                                                                               2 
##                                                                     Johnny Test 
##                                                                               2 
##                                                                         Falafel 
##                                                                               2 
##                                                      Ajab Prem Ki Ghazab Kahani 
##                                                                               2 
##                                                                     Cairo 6,7,8 
##                                                                               2 
##                                                                     Open Season 
##                                                                               2 
##                                                        Kabhi Khushi Kabhie Gham 
##                                                                               2 
##                                                             Like Stars on Earth 
##                                                                               2 
##                                                                       Astro Boy 
##                                                                               2 
##                                                                         Fashion 
##                                                                               2 
##                                                                   Kal Ho Naa Ho 
##                                                                               2 
##                                                    Ben & Holly's Little Kingdom 
##                                                                               2 
##                                                                       Octonauts 
##                                                                               2 
##                                                         Manorama Six Feet Under 
##                                                                               2 
##                                                                   Open Season 2 
##                                                                               2 
##                                      Trailer Park Boys: Countdown to Liquor Day 
##                                                                               2 
##                                                                         Ishqiya 
##                                                                               2 
##                                                         Jim Gaffigan: King Baby 
##                                                                               2 
##                                                                   Dinosaur King 
##                                                                               2 
##                                                              The Pink Panther 2 
##                                                                               2 
##                                                                 Once a Gangster 
##                                                                               2 
##                                                                            Fida 
##                                                                               2 
##                                                                Ijé: The Journey 
##                                                                               2 
##                                                                        Comedian 
##                                                                               2 
##                                                                       My Führer 
##                                                                               2 
##                                                                       Raajneeti 
##                                                                               2 
##                                                                Salt of This Sea 
##                                                                               2 
##                                                                     Wake Up Sid 
##                                                                               2 
##                                                               National Security 
##                                                                               2 
##                                                                   Made of Honor 
##                                                                               2 
##                                               Lagaan: Once Upon a Time in India 
##                                                                               2 
##                                                              Friends with Money 
##                                                                               2 
##                                                              Da Kath & Kim Code 
##                                                                               2 
##                                                                        G.O.R.A. 
##                                                                               2 
##                                                                  The Stepfather 
##                                                                               2 
##                                                                          Paheli 
##                                                                               2 
##                                                                      One 2 Ka 4 
##                                                                               2 
##                                  Naruto the Movie: Legend of the Stone of Gelel 
##                                                                               2 
##                                                                           Wakfu 
##                                                                               2 
##                                                          Kabhi Alvida Naa Kehna 
##                                                                               2 
##                                              Kevin Hart: I'm a Grown Little Man 
##                                                                               2 
##                                                             Tayo the Little Bus 
##                                                                               2 
##                                                                   Chalte Chalte 
##                                                                               2 
##                                                                  Daddy Day Camp 
##                                                                               2 
##                                                      I Can Do Bad All By Myself 
##                                                                               2 
##                                                                        Ip Man 2 
##                                                                               2 
##                        Naruto the Movie: Guardians of the Crescent Moon Kingdom 
##                                                                               2 
##                                                       Pororo the Little Penguin 
##                                                                               2 
##                                                       The Legend of the Nahuala 
##                                                                               2 
##                                                                    Om Shanti Om 
##                                                                               2 
##                                                                            Iris 
##                                                                               2 
##                                                                    Jodhaa Akbar 
##                                                                               2 
##                                                                   Beast Stalker 
##                                                                               2 
##                                                                  Dil Chahta Hai 
##                                                                               2 
##                                                                       Rock On!! 
##                                                                               2 
##                                                               I Hate Luv Storys 
##                                                                               2 
##                                                                  Luck by Chance 
##                                                                               2 
##                                                                    Chhota Bheem 
##                                                                               2 
##                                               Naruto Shippuden the Movie: Bonds 
##                                                                               2 
##                                    Naruto Shippuden the Movie: The Will of Fire 
##                                                                               2 
##                                                  Barbershop 2: Back in Business 
##                                                                               2 
##                                     Legend of the Fist: The Return of Chen Zhen 
##                                                                               2 
##                                                                    Main Hoon Na 
##                                                                               2 
##                                                                 Bon Cop Bad Cop 
##                                                                               2 
##                                                                 Look for a Star 
##                                                                               2 
##                                                               A Romantic Comedy 
##                                                                               2 
##                                                                 Vientos de agua 
##                                                                               2 
##                               Naruto the Movie: Ninja Clash in the Land of Snow 
##                                                                               2 
##                                                               Fated to Love You 
##                                                                               2 
##                                                                         Lakshya 
##                                                                               2 
##                                                                            Kaal 
##                                                                               2 
##                                                                This Is the Life 
##                                                                               2 
##                                                                    Sir! No Sir! 
##                                                                               2 
##                             Inuyasha the Movie: Affections Touching Across Time 
##                                                                               2 
##                                 Inuyasha the Movie 4: Fire on the Mystic Island 
##                                                                               2 
##                                                                          Arahan 
##                                                                               2 
##                                                                      Waist Deep 
##                                                                               2 
##                                                                       Connected 
##                                                                               2 
##                                                         Karthik Calling Karthik 
##                                                                               2 
##                                                            Dhan Dhana Dhan Goal 
##                                                                               2 
##                                                The Prince Who Turns into a Frog 
##                                                                               2 
##                                                                     Peepli Live 
##                                                                               2 
##                                                                 Life in a Metro 
##                                                                               2 
##                                                    Monty Python's Personal Best 
##                                                                               2 
##                                                                        The Kite 
##                                                                               2 
##                                                                           Rabun 
##                                                                               2 
##                              Inuyasha the Movie 3: Swords of an Honorable Ruler 
##                                                                               2 
##                       Inuyasha the Movie 2: The Castle Beyond the Looking Glass 
##                                                                               2 
##                                                                   Twins Mission 
##                                                                               2 
##                                                                         Kaminey 
##                                                                               2 
##                                                                  Love in a Puff 
##                                                                               2 
##                                                Jeff Dunham: Arguing with Myself 
##                                                                               2 
##                                    Jeff Dunham's Very Special Christmas Special 
##                                                                               2 
##                                                                 Under the Bombs 
##                                                                               2 
##                                                          Pomegranates and Myrrh 
##                                                                               2 
##                                                                            Tito 
##                                                                               2 
##                                                                    A Love Story 
##                                                                               2 
##                                                      Naruto Shippuden the Movie 
##                                                                               2 
##                                      Naruto Shippuden the Movie: The Lost Tower 
##                                                                               2 
##                                                                  Zig and Sharko 
##                                                                               2 
##           One Piece: The Desert Princess and the Pirates: Adventure in Alabasta 
##                                                                               2 
## One Piece: Episode of Chopper Plus: Bloom in the Winter, Miracle Cherry Blossom 
##                                                                               2 
##                                                                 The Mafia Dolls 
##                                                                               2 
##                                                            The Magic Roundabout 
##                                                                               2 
##                                                   Kung Fu Panda Awesome Secrets 
##                                                                               2 
##                                                                Sivaji: The Boss 
##                                                                               2 
##                                                                         Welcome 
##                                                                               2 
##                                                                      The Unjust 
##                                                                               2 
##                                                                 Oscar's Oasis 2 
##                                                                               2 
##                                                                  Eternal Summer 
##                                                                               2 
##                                                DreamWorks Shrek's Swamp Stories 
##                                                                               2 
##                                                                       Vizontele 
##                                                                               2 
##                                                                    Chup Chup Ke 
##                                                                               2 
##                                                            Awara Paagal Deewana 
##                                                                               2 
##                                                                           Udaan 
##                                                                               2 
##                                                           Kannathil Muthamittal 
##                                                                               2 
##                                                                   Africa United 
##                                                                               2 
##                                                                         Delhi-6 
##                                                                               2 
##                                                     Honeymoon Travels Pvt. Ltd. 
##                                                                               2 
##                               Monty Python: Almost the Truth (The Lawyer's Cut) 
##                                                                               2 
##                                                                      Anbe Sivam 
##                                                                               2 
##                                                                   We Are Family 
##                                                                               2 
##                                                             Divine Intervention 
##                                                                               2 
##                                                                    The Hospital 
##                                                                               2 
##                                                        Chhota Bheem Aur Krishna 
##                                                                               2 
##                                              The Trailer Park Boys Xmas Special 
##                                                                               2 
##                                                                         Mukhsin 
##                                                                               2 
##                                                                 Taxi No. 9 2 11 
##                                                                               2 
##                                                                            Yuva 
##                                                                               2 
##                                                                      Ishq Vishk 
##                                                                               2 
##                                                                           Booha 
##                                                                               2 
##                                                                  Do Dooni Chaar 
##                                                                               2 
##                                                             Anukokunda Oka Roju 
##                                                                               2 
##                                                   George Lopez: Why You Crying? 
##                                                                               2 
##                                                                       Talentime 
##                                                                               2 
##                                                             A Lion in the House 
##                                                                               2 
##                                                                     The Hostage 
##                                                                               2 
##                                                              Taimour & Shafi'aa 
##                                                                               2 
##                                                                            Ezra 
##                                                                               2 
##                                                                          Parugu 
##                                                                               2 
##                                                                         Katkout 
##                                                                               2 
##                                                             Soldier In the Camp 
##                                                                               2 
##                                                                       The Ghost 
##                                                                               2 
##                                                                         Kuselan 
##                                                                               2 
##                                                                            Zozo 
##                                                                               2 
##                                                             A Natural Born Fool 
##                                                                               2 
##                                                                   The Ultimatum 
##                                                                               2 
##                                                                 My Amnesia Girl 
##                                                                               2 
##                                                              Private Alexandria 
##                                                                               2 
##                                                                Transit Prisoner 
##                                                                               2 
##                                                                    Italia's War 
##                                                                               2 
##                                                                       Te quiero 
##                                                                               2 
##                                                   The Great Fava Beans of China 
##                                                                               2 
##                                                                        Soul boy 
##                                                                               2 
##                                                          About Love and Passion 
##                                                                               2 
##                                                             A Very Special Love 
##                                                                               2 
##                                                                   Frank & Cindy 
##                                                                               2 
##                                                                      The Island 
##                                                                               2 
##                                                                           Bosta 
##                                                                               2 
##                                                                       Pink Zone 
##                                                                               2 
##                           Chhota Bheem & Krishna: Pataliputra- City of the Dead 
##                                                                               2 
##                                                   Frontiers of Dreams and Fears 
##                                                                               2 
##                                                                Encrypted Letter 
##                                                                               2 
##                                                                    Pravarakyudu 
##                                                                               2 
##                                                                       Shameless 
##                                                                               1
View(classes)
table(classes)
## classes
##   1   2 
## 172 328
d.class=cbind.data.frame(scaled_data[1:500,],as.factor(classes))
View(d.class)
colnames(d.class)[7]="classes.cah"
catdes(d.class, num.var =7)
## 
## Link between the cluster variable and the quantitative variables
## ================================================================
##                       Eta2      P-value
## release_year    0.47038726 9.584151e-71
## imdb_votes      0.08513007 2.868563e-11
## runtime         0.04478746 1.801170e-06
## tmdb_popularity 0.03252361 4.996272e-05
## 
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
##                     v.test Mean in category Overall mean sd in category
## imdb_votes        6.517661        1.6409586    0.5849771       4.223044
## runtime           4.727467        0.8592783    0.4954530       1.128785
## tmdb_popularity   4.028559        0.4949743    0.1413928       2.305350
## release_year    -15.320680       -3.9625761   -2.4827403       1.765473
##                 Overall sd      p.value
## imdb_votes        2.620850 7.141223e-11
## runtime           1.244922 2.273384e-06
## tmdb_popularity   1.419768 5.611989e-05
## release_year      1.562475 5.563218e-53
## 
## $`2`
##                    v.test Mean in category Overall mean sd in category
## release_year    15.320680      -1.70672893   -2.4827403      0.5800896
## tmdb_popularity -4.028559      -0.04402192    0.1413928      0.4311546
## runtime         -4.727467       0.30466657    0.4954530      1.2603880
## imdb_votes      -6.517661       0.03123074    0.5849771      0.4768679
##                 Overall sd      p.value
## release_year      1.562475 5.563218e-53
## tmdb_popularity   1.419768 5.611989e-05
## runtime           1.244922 2.273384e-06
## imdb_votes        2.620850 7.141223e-11
library(factoextra)
fviz_cluster(object=list(data = scaled_data[1:500,], cluster = classes))

#Application du kmeans

kmeans=kmeans(scaled_data[1:500,],centers=2,iter.max = 1000)

##Ajout des clusters de kmeans au jeux des données

d.class$classes.kmeans= as.factor(kmeans$cluster)
res.cat1 = catdes(d.class,num.var =8)
res.cat1
## 
## Link between the cluster variable and the categorical variables (chi-square test)
## =================================================================================
##                  p.value df
## classes.cah 8.233625e-05  1
## 
## Description of each cluster by the categories
## =============================================
## $`1`
##                 Cla/Mod  Mod/Cla Global      p.value    v.test
## classes.cah=2 100.00000 66.66667   65.6 0.0001758575  3.751389
## classes.cah=1  95.34884 33.33333   34.4 0.0001758575 -3.751389
## 
## $`2`
##                Cla/Mod Mod/Cla Global      p.value    v.test
## classes.cah=1 4.651163     100   34.4 0.0001758575  3.751389
## classes.cah=2 0.000000       0   65.6 0.0001758575 -3.751389
## 
## 
## Link between the cluster variable and the quantitative variables
## ================================================================
##                       Eta2       P-value
## imdb_votes      0.76056470 1.071825e-156
## tmdb_popularity 0.07727822  2.522807e-10
## imdb_score      0.04930540  5.290466e-07
## tmdb_score      0.03417148  3.195862e-05
## 
## Description of each cluster by quantitative variables
## =====================================================
## $`1`
##                     v.test Mean in category Overall mean sd in category
## tmdb_score       -4.129354       0.08111544    0.1032642      0.9306130
## imdb_score       -4.960181       0.27116263    0.2985350      0.9491248
## tmdb_popularity  -6.209817       0.09106493    0.1413928      1.2798909
## imdb_votes      -19.481319       0.29352137    0.5849771      1.0910835
##                 Overall sd      p.value
## tmdb_score       0.9396260 3.637833e-05
## imdb_score       0.9667259 7.042758e-07
## tmdb_popularity  1.4197684 5.304620e-10
## imdb_votes       2.6208498 1.581527e-84
## 
## $`2`
##                    v.test Mean in category Overall mean sd in category
## imdb_votes      19.481319        18.509507    0.5849771      5.4384636
## tmdb_popularity  6.209817         3.236556    0.1413928      3.9374680
## imdb_score       4.960181         1.981938    0.2985350      0.3584530
## tmdb_score       4.129354         1.465413    0.1032642      0.1842568
##                 Overall sd      p.value
## imdb_votes       2.6208498 1.581527e-84
## tmdb_popularity  1.4197684 5.304620e-10
## imdb_score       0.9667259 7.042758e-07
## tmdb_score       0.9396260 3.637833e-05

#Vérification des choix de nombre des clusters selon CAH et Kmeans

library(clValid)
options(clValid.maxitems = 500)
subset_data <- scaled_data[1:500, ]
intern1=clValid(scaled_data[1:500,],2:6,clMethods=c("hierarchical","kmeans"), validation= "internal")
summary(intern1)
## 
## Clustering Methods:
##  hierarchical kmeans 
## 
## Cluster sizes:
##  2 3 4 5 6 
## 
## Validation Measures:
##                                  2       3       4       5       6
##                                                                   
## hierarchical Connectivity   4.2143 10.1143 12.7726 14.9810 18.2238
##              Dunn           0.3661  0.2511  0.2511  0.2511  0.2511
##              Silhouette     0.8236  0.7123  0.6794  0.6417  0.6393
## kmeans       Connectivity   8.7710 14.0627 41.9881 81.3266 84.5694
##              Dunn           0.1825  0.1562  0.0647  0.0379  0.0379
##              Silhouette     0.8067  0.6941  0.3607  0.3322  0.3289
## 
## Optimal Scores:
## 
##              Score  Method       Clusters
## Connectivity 4.2143 hierarchical 2       
## Dunn         0.3661 hierarchical 2       
## Silhouette   0.8236 hierarchical 2

#Supervised Learning : Random Forest Regression

scaled_data = as.data.frame(scaled_data)
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
## 
##     lift
ind=createDataPartition(scaled_data$imdb_score, times = 1,p= 0.7,list=FALSE)
a=scaled_data[ind,]
t=scaled_data[-ind,]
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
## 
##     combine
## The following object is masked from 'package:ggplot2':
## 
##     margin
model <- randomForest(imdb_score~release_year+imdb_votes+runtime, data = a)

#Evaluation selon le critere MSE

predictions <- predict(model, newdata = t)
MSE = mean((predictions - t$imdb_score)^2)
MSE
## [1] 0.7680208
write.csv(data_clean, file = "final_data.csv", row.names = TRUE)